{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据预处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "import pyprind\n",
    "import pandas as pd\n",
    "import os\n",
    "from nltk.corpus import stopwords\n",
    "import re\n",
    "import numpy as np\n",
    "\n",
    "stop = stopwords.words('english')\n",
    "\n",
    "def tokenizer(text):\n",
    "    text = re.sub('<[^>]*>', '', text)\n",
    "    emoticons = re.findall('(?::|;|=)(?:-)?(?:\\)|\\(|D|P)', text.lower())\n",
    "    text = re.sub('[\\W]+', ' ', text.lower()) +\\\n",
    "        ' '.join(emoticons).replace('-', '')\n",
    "    tokenized = [w for w in text.split() if w not in stop]\n",
    "    tokenized = text.split()\n",
    "    return tokenized\n",
    "\n",
    "train_data_path = 'data/labeledTrainData.tsv'\n",
    "df = pd.read_csv(train_data_path, sep=\"\\t\")\n",
    "df[\"review\"] = df[\"review\"].apply(lambda x : tokenizer(x))\n",
    "df.to_csv('data/movie_data.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 生成词向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "0% [#######                       ] 100% | ETA: 00:00:14"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "vocabulary building finished, start training...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "0% [###############               ] 100% | ETA: 00:00:12"
     ]
    }
   ],
   "source": [
    "import pyprind\n",
    "import gensim.models\n",
    "import re\n",
    "\n",
    "inpath = 'data/movie_data.csv'\n",
    "outpath = 'results/wordVectTrainResult'\n",
    "pbar = pyprind.ProgBar(100000)\n",
    "class csvStream(object):\n",
    "    def __init__(self,path):\n",
    "        self.path=path\n",
    "    def __iter__(self):\n",
    "        with open(self.path, 'r',) as csv:\n",
    "            next(csv)  # skip header\n",
    "            for line in csv:\n",
    "                text = line[4:-3]\n",
    "                text = re.sub('[\\'\\\"\\[\\]\\d\\b]','',text)   \n",
    "                while (text[0] == ',') or (text[0] == ' '):\n",
    "                    text = text[1:]\n",
    "                pbar.update()\n",
    "                yield text.split(', ')\n",
    "\n",
    "\n",
    "lineIterator = csvStream(inpath)\n",
    "model = gensim.models.Word2Vec()\n",
    "model.build_vocab(lineIterator)\n",
    "print('vocabulary building finished, start training...')\n",
    "model.train(lineIterator,total_examples=model.corpus_count,epochs=1)\n",
    "model.save(outpath)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 词向量测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 100 100\n",
      "0.9167262516432011\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/hz/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:23: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
      "/Users/hz/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:24: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
      "/Users/hz/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:25: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n"
     ]
    }
   ],
   "source": [
    "import gensim.models\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def cos_sim(vector_a, vector_b):\n",
    "    \"\"\"\n",
    "    计算两个向量之间的余弦相似度\n",
    "    :param vector_a: 向量 a \n",
    "    :param vector_b: 向量 b\n",
    "    :return: sim\n",
    "    \"\"\"\n",
    "    vector_a = np.mat(vector_a)\n",
    "    vector_b = np.mat(vector_b)\n",
    "    num = float(vector_a * vector_b.T)\n",
    "    denom = np.linalg.norm(vector_a) * np.linalg.norm(vector_b)\n",
    "    cos = num / denom\n",
    "    sim = 0.5 + 0.5 * cos\n",
    "    return sim\n",
    "\n",
    "\n",
    "inpath = 'results/wordVectTrainResult'\n",
    "model = gensim.models.Word2Vec.load(inpath)\n",
    "test1 = model[\"good\"]\n",
    "test2 = model[\"nice\"]\n",
    "test3 = model[\"go\"]\n",
    "\n",
    "\n",
    "print(len(test1), len(test2),len(test3))\n",
    "print(cos_sim(test1,test2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 分类测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/hz/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:15: DeprecationWarning: Call to deprecated `__contains__` (Method will be removed in 4.0.0, use self.wv.__contains__() instead).\n",
      "  from ipykernel import kernelapp as app\n",
      "/Users/hz/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:17: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         id  sentiment                                             review  len\n",
      "0  12311_10          0  [-0.19330275617890003, 0.10081300452282421, 0....  100\n",
      "1    8348_2          0  [-0.07429290727502805, 0.12601268771738255, 0....  100\n",
      "2    5828_4          0  [0.11162658395491294, 0.00733450739030409, 0.3...  100\n",
      "3    7186_2          0  [-0.0369541522263067, 0.16518554098105856, 0.3...  100\n",
      "4   12128_7          0  [-0.09854314756706696, 0.14072058940038198, 0....  100\n",
      "5    2913_8          0  [0.034660419103091776, 0.05697532973075008, 0....  100\n",
      "6    4396_1          0  [0.17724977271983355, 0.12278373242634547, 0.4...  100\n",
      "7     395_2          0  [0.1945963272358248, 0.16014676212107132, 0.41...  100\n",
      "8   10616_1          0  [0.11036306295208022, -0.05364430498820348, 0....  100\n",
      "9    9074_9          0  [-0.0893094160754835, -0.01371210671004927, 0....  100\n"
     ]
    }
   ],
   "source": [
    "from sklearn.neural_network import MLPClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import linear_model\n",
    "\n",
    "df1 = pd.read_csv(\"data/sampleSubmission.csv\", sep=\",\")\n",
    "df2 = pd.read_csv(\"data/testData.tsv\", sep=\"\\t\")\n",
    "df = pd.merge(df1, df2, on=\"id\", how=\"inner\")\n",
    "\n",
    "inpath = 'results/wordVectTrainResult'\n",
    "model = gensim.models.Word2Vec.load(inpath)\n",
    "\n",
    "def get_text_vector(text):\n",
    "    res = np.zeros([100])\n",
    "    for item in text:\n",
    "        if item not in model:\n",
    "            continue\n",
    "        res += model[item]\n",
    "    return res / len(text)\n",
    "\n",
    "df[\"review\"] = df[\"review\"].apply(lambda x : tokenizer(x))\n",
    "df[\"review\"] = df[\"review\"].apply(lambda x : get_text_vector(x))\n",
    "df[\"len\"] = df[\"review\"].apply(lambda x : len(x))\n",
    "\n",
    "print(df[:10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(18750,)\n",
      "(18750,)\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "setting an array element with a sequence.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-89-0e999f801bac>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMLPClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msolver\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'lbfgs'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1e-5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mhidden_layer_sizes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0mclf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m    980\u001b[0m         \"\"\"\n\u001b[1;32m    981\u001b[0m         return self._fit(X, y, incremental=(self.warm_start and\n\u001b[0;32m--> 982\u001b[0;31m                                             hasattr(self, \"classes_\")))\n\u001b[0m\u001b[1;32m    983\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    984\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36m_fit\u001b[0;34m(self, X, y, incremental)\u001b[0m\n\u001b[1;32m    321\u001b[0m                              hidden_layer_sizes)\n\u001b[1;32m    322\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 323\u001b[0;31m         \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mincremental\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    324\u001b[0m         \u001b[0mn_samples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_features\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    325\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/neural_network/multilayer_perceptron.py\u001b[0m in \u001b[0;36m_validate_input\u001b[0;34m(self, X, y, incremental)\u001b[0m\n\u001b[1;32m    917\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_validate_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mincremental\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    918\u001b[0m         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[0;32m--> 919\u001b[0;31m                          multi_output=True)\n\u001b[0m\u001b[1;32m    920\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    921\u001b[0m             \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolumn_or_1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwarn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[0;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    717\u001b[0m                     \u001b[0mensure_min_features\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mensure_min_features\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    718\u001b[0m                     \u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwarn_on_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 719\u001b[0;31m                     estimator=estimator)\n\u001b[0m\u001b[1;32m    720\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    494\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    495\u001b[0m                 \u001b[0mwarnings\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msimplefilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 496\u001b[0;31m                 \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    497\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    498\u001b[0m                 raise ValueError(\"Complex data not supported\\n\"\n",
      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/numpy/core/numeric.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m    536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    537\u001b[0m     \"\"\"\n\u001b[0;32m--> 538\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    539\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: setting an array element with a sequence."
     ]
    }
   ],
   "source": [
    "from sklearn.neural_network import MLPClassifier\n",
    "\n",
    "X, y = df[[\"review\"]].values.ravel(), df[\"sentiment\"].values.ravel()\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=1)\n",
    "print(X_train.shape)\n",
    "\n",
    "print(y_train.shape)\n",
    "clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5,), random_state=1)\n",
    "clf.fit(X_train, y_train)\n",
    "\n",
    "clf.predict_proba(X_test[:1])\n",
    "\n",
    "clf.predict(X_test[:5, :])\n",
    "\n",
    "clf.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2, 100)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,\n",
       "              beta_2=0.999, early_stopping=False, epsilon=1e-08,\n",
       "              hidden_layer_sizes=(5,), learning_rate='constant',\n",
       "              learning_rate_init=0.001, max_iter=200, momentum=0.9,\n",
       "              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,\n",
       "              random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,\n",
       "              validation_fraction=0.1, verbose=False, warm_start=False)"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5,), random_state=1)\n",
    "\n",
    "X_train = np.array([np.array([ 1.29321224e-01,  5.18960132e-02,  3.79400451e-01, -3.72608754e-01,\n",
    "       -2.18533632e-01,  2.59320804e-01,  5.39880085e-01,  5.16798938e-01,\n",
    "       -1.97540062e-01,  9.55656952e-01,  5.49076735e-01,  3.23896985e-01,\n",
    "       -3.69491834e-01,  1.37926866e-01, -9.34858046e-01,  3.49694259e-01,\n",
    "        5.02927222e-01,  7.72485569e-02,  2.41516394e-01,  4.35102434e-01,\n",
    "       -2.33707874e-01, -3.42638469e-01,  5.13357171e-01,  4.08318066e-01,\n",
    "       -5.63935861e-01,  3.31579943e-01, -1.00997857e-01,  8.32257381e-01,\n",
    "       -1.03186743e-01, -2.17901886e-01, -3.72759429e-01,  4.76303828e-01,\n",
    "        1.01170254e+00,  5.34718914e-01, -9.60375156e-02, -4.29679660e-01,\n",
    "        5.23644536e-01, -4.65647882e-01, -1.49980314e-01, -2.19136089e-02,\n",
    "        6.33299342e-03,  5.37065999e-01, -4.86698317e-01,  1.02540010e-01,\n",
    "        1.97397357e-01,  1.53287670e-01, -4.48417012e-01,  6.40119717e-01,\n",
    "        3.55489010e-01,  6.53285550e-02,  5.25642002e-01, -1.30566096e-03,\n",
    "        2.51422268e-01,  3.30942424e-01,  1.19990460e-01, -1.04565109e-01,\n",
    "       -1.92086261e-01, -1.31871328e+00,  2.32135119e-01,  2.22830327e-01,\n",
    "        4.79117552e-01, -5.23658381e-03,  4.88311444e-01, -1.47033792e-02,\n",
    "       -2.58355609e-01,  1.07908211e+00,  2.32782062e-01, -3.90683065e-02,\n",
    "       -3.77066846e-01,  7.92226757e-02, -1.03517982e+00,  4.02089629e-01,\n",
    "        6.82145484e-02, -2.45045518e-01,  2.72413841e-01, -6.30229306e-01,\n",
    "       -3.88759992e-01, -8.83834824e-01, -6.02251000e-01, -4.96690610e-01,\n",
    "       -3.37865016e-01, -3.01193769e-02,  6.50604504e-01, -7.07761998e-02,\n",
    "        4.53892187e-01,  5.53244393e-01,  3.83158314e-01, -8.42868587e-02,\n",
    "        6.80161905e-02,  1.01100661e-01, -3.39743594e-01,  2.16301889e-01,\n",
    "       -5.55453076e-01, -1.46315171e-01,  6.24522818e-01,  5.87731878e-01,\n",
    "       -1.10606850e-01, -1.29103243e-01, -1.77118074e-01, -3.12824513e-01]),\n",
    " np.array([ 0.14950982,  0.03738608,  0.42585563, -0.40345302, -0.23404879,\n",
    "        0.325457  ,  0.52644093,  0.31441613, -0.08783832,  0.85549325,\n",
    "        0.59543287,  0.37857117, -0.3135474 ,  0.03890331, -0.80587419,\n",
    "        0.32922168,  0.64687974,  0.1379085 ,  0.15211546,  0.418793  ,\n",
    "       -0.14782306, -0.26198192,  0.34435682,  0.51897987, -0.65181933,\n",
    "        0.35897195, -0.26711759,  0.8401758 , -0.17628074, -0.14719422,\n",
    "       -0.43270119,  0.56302991,  1.0217013 ,  0.52281307, -0.01557398,\n",
    "       -0.44794893,  0.64046578, -0.51360712, -0.04466807, -0.08671971,\n",
    "       -0.06077578,  0.49838649, -0.46617385,  0.02413524,  0.25871847,\n",
    "        0.1381659 , -0.44741985,  0.64232978,  0.48129942,  0.07564186,\n",
    "        0.37772529, -0.07793722,  0.28262221,  0.40180358,  0.06860969,\n",
    "       -0.17198053, -0.1013253 , -1.34171801,  0.24602981,  0.37648783,\n",
    "        0.51747068, -0.03582477,  0.50254968,  0.12601257, -0.13872963,\n",
    "        1.0529655 ,  0.21765285, -0.11891374, -0.32816064,  0.03736426,\n",
    "       -1.01552567,  0.30671988, -0.02039242, -0.1927615 ,  0.27264915,\n",
    "       -0.43618522, -0.4158235 , -0.8517373 , -0.54522136, -0.58439193,\n",
    "       -0.31247232, -0.15039204,  0.57947311, -0.12534831,  0.5562728 ,\n",
    "        0.59012879,  0.33703196, -0.1903191 ,  0.02046448,  0.09091111,\n",
    "       -0.39141423,  0.1063484 , -0.42469542, -0.12269154,  0.71052247,\n",
    "        0.65662416, -0.22396823, -0.09404335, -0.12750437, -0.21620624])])\n",
    "y_train = [1,0]\n",
    "\n",
    "print(X_train.shape)\n",
    "\n",
    "\n",
    "clf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
